# .........................................................................
# Title: security_regressions.py
#
# Estimates baseline security-level regressions using both OLS and 2SLS
# methodologies; also implements robustness estimates
# .........................................................................

import pandas as pd
import numpy as np
import subprocess
import econtools.metrics as mt
from econtools.util.frametools import group_id

# directories
PROJECT_PATH = "<PROJECT_PATH>/"
CODE_PATH = "<CODE_PATH>/"
TMP = PROJECT_PATH + "temp/"
TABLES = PROJECT_PATH + "tables/"
RAW = PROJECT_PATH + "raw/"

# ---------------------------------------
# Prep data
# ---------------------------------------

# events list
events = ['gr', 'covid', 'p11', 'p16']

# read in the drawdowns
py_drawdowns_hedged = {x: pd.read_stata(TMP + "drawdowns_frompy_{}_hedged.dta".format(x)) for x in events}
py_drawdowns = {x: pd.read_stata(TMP + "drawdowns_frompy_{}.dta".format(x)) for x in events}
fw_drawdowns = {x: pd.read_stata(TMP + "fw_drawdowns_{}.dta".format(x)) for x in events}

# add event markers
for event in events:    
    py_drawdowns_hedged[event]['event'] = event
    py_drawdowns[event]['event'] = event
    fw_drawdowns[event]['event'] = event
    
# consolidate dataframes
py_drawdowns_hedged = pd.concat(py_drawdowns_hedged).reset_index()
py_drawdowns = pd.concat(py_drawdowns).reset_index()
fw_drawdowns = pd.concat(fw_drawdowns).reset_index()

# dropping unneeded columns
py_drawdowns_hedged.drop(columns=['level_0', 'level_1', 'index'], inplace=True)
py_drawdowns.drop(columns=['level_0', 'level_1', 'index'], inplace=True)
fw_drawdowns.drop(columns=['level_0', 'level_1', 'cum_hedged_return1', 'cum_hedged_return2'], inplace=True)

# renaming columns
py_drawdowns_hedged.rename(columns={'py_drawdown': 'hedged_drawdown_py'}, inplace=True)
py_drawdowns.rename(columns={'py_drawdown': 'drawdown_py'}, inplace=True)
fw_drawdowns.rename(columns={'fw_hedged_drawdown': 'hedged_drawdown_fw'}, inplace=True)

# all merged
all_drawdowns = py_drawdowns_hedged.merge(
    py_drawdowns, on=['cusip', 'event'], how='outer'
).merge(
    fw_drawdowns, on=['cusip', 'event'], how='outer'
)

# list of drawdown variables
drawdown_vars = ['hedged_drawdown_py', 'hedged_drawdown_fw', 'drawdown_py']
compact_drawdowns = all_drawdowns[['cusip', 'event'] + drawdown_vars]
for var in drawdown_vars:
    compact_drawdowns['log_' + var] = np.log(compact_drawdowns[var])

# ---------------------------------------
# Add covariates
# ---------------------------------------

# trading gaps
trading_gaps = {x: pd.read_stata(TMP + "event_forpy_{}.dta".format(x)) for x in events}
for event in events:
    trading_gaps[event]['event'] = event
trading_gaps = pd.DataFrame(pd.concat(trading_gaps).reset_index().groupby(['cusip', 'event'])['has_trading_gaps'].max().astype(int)).reset_index()

# covariates
covariates = pd.read_stata(TMP + "event_regframe_covariates.dta")
covariates['event'] = covariates['event'].replace('great_recession', 'gr')
reg_data = compact_drawdowns.merge(covariates, on=['event', 'cusip'], how='inner').replace([np.inf, -np.inf], np.nan)
reg_data = reg_data.merge(trading_gaps, on=['event', 'cusip'], how='left')

# ---------------------------------------
# Regression implementation
# ---------------------------------------

# specification
outcome = 'log_hedged_drawdown_py'
regressor = 'share_ins_issuance'
cluster_var = 'firm_event_cluster'
weights = 'value_outstanding'
fe_specs = ['event', 'duration_event', 'duration_rating_event', 'fdr_event', 'full_fe_category']

# working data
work_data = reg_data[[outcome, regressor, cluster_var, weights] + fe_specs + ['cusip']]
work_data = work_data.replace([np.inf, -np.inf], np.nan).dropna()
reg_data.to_stata(TMP + "variation_histograms_sample.dta")

# regressions
working_regs = {}
working_regs['event'] = mt.reg(reg_data, outcome, regressor, fe_name='event', awt_name=weights, cluster=cluster_var)
working_regs['duration_event'] = mt.reg(reg_data, outcome, regressor, fe_name='duration_event', awt_name=weights, cluster=cluster_var)
working_regs['duration_rating_event'] = mt.reg(reg_data, outcome, regressor, fe_name='duration_rating_event', awt_name=weights, cluster=cluster_var)
working_regs['fdr_event'] = mt.reg(reg_data, outcome, regressor, fe_name='fdr_event', awt_name=weights, cluster=cluster_var)
working_regs['full_fe_category'] = mt.reg(reg_data, outcome, regressor, fe_name='full_fe_category', awt_name=weights, cluster=cluster_var)

# number of identifying issuers
def get_identifying_issuers(fixed_eff, df, regs_dict):
    return len(set(df[regs_dict[fixed_eff].sample]['consolidated_issuer_id']))
n_id_issuers = {x: get_identifying_issuers(x, df=reg_data, regs_dict=working_regs) for x in fe_specs}

# collect results to dataframe
working_results = pd.DataFrame([
    {k: round(v.beta.values[0], 2) for k,v in working_regs.items()},
    {k: round(v.se.values[0], 2) for k,v in working_regs.items()},
    {k: round(v.r2, 2) for k,v in working_regs.items()},
    {k: int(v.N) for k,v in working_regs.items()},
    {k: int(n_id_issuers[k]) for k,v in working_regs.items()},
], index=['beta', 'se', 'r2', 'N', 'issuers'])

# construct and output OLS panel for display (Table 3)
dummy_labels = {
    'Event Dummy': {'event': 'Yes', 'duration_event': 'Yes', 'duration_rating_event': 'Yes', 'fdr_event': 'Yes', 'full_fe_category': 'Yes'},
    'Bond Size' : {'event': '--', 'duration_event': 'Yes', 'duration_rating_event': 'Yes', 'fdr_event': 'Yes', 'full_fe_category': 'Yes'},
    'Bond Duration' : {'event': '--', 'duration_event': 'Yes', 'duration_rating_event': 'Yes', 'fdr_event': 'Yes', 'full_fe_category': 'Yes'},
    'Bond Rating' : {'event': '--', 'duration_event': '--', 'duration_rating_event': 'Yes', 'fdr_event': 'Yes', 'full_fe_category': 'Yes'},
    'Issuer Dummy' : {'event': '--', 'duration_event': '--', 'duration_rating_event': '--', 'fdr_event': 'Yes', 'full_fe_category': 'Yes'},
    'Callable Dummy' : {'event': '--', 'duration_event': '--', 'duration_rating_event': '--', 'fdr_event': '--', 'full_fe_category': 'Yes'},
    'Senior Dummy' : {'event': '--', 'duration_event': '--', 'duration_rating_event': '--', 'fdr_event': '--', 'full_fe_category': 'Yes'},
    'Coupon Type' : {'event': '--', 'duration_event': '--', 'duration_rating_event': '--', 'fdr_event': '--', 'full_fe_category': 'Yes'},
}
ols_panel = working_results.T
for dummy in dummy_labels.keys():
    ols_panel[dummy] = dummy_labels[dummy]
ols_panel = ols_panel.rename(columns={'beta': 'Insurer Share', 'se': 'Standard Error', 'N': 'Identifying Observations', 'issuers': 'Identifying Issuers', 'r2': 'R2'})
row_display_order = ['Insurer Share', 'Standard Error', 'Event Dummy', 'Bond Size', 'Bond Duration', 'Bond Rating', 'Issuer Dummy', 'Callable Dummy', 'Senior Dummy', 'Coupon Type', 'Identifying Observations', 'Identifying Issuers', 'R2']
ols_panel = ols_panel.reindex(row_display_order, axis=1).T
ols_panel.columns = ['(1)', '(2)', '(3)', '(4)', '(5)']
ols_panel.to_string(TABLES + "/ols_panel.txt")

# ---------------------------------------
# Additional data for robustness
# ---------------------------------------

# load security master
security_master = pd.read_stata(RAW + "cmns/gcap_security_master_cusip.dta")

# callable dummy
stat_characteristics = pd.read_stata(TMP + "consolidated_static_dummies.dta")
stat_characteristics = stat_characteristics[['cusip', 'bc_is_callable']].rename(columns={'bc_is_callable': 'callable'}).dropna()
stat_characteristics.callable = stat_characteristics.callable.astype(int)

# add data
reg_data_rb = reg_data.merge(security_master[security_master.cusip.isin(reg_data.cusip)], on=['cusip'], how='left')
reg_data_rb = reg_data_rb.merge(stat_characteristics, on=['cusip'], how='left')

# bond betas
beta_spec = 'beta24m_w'
betas = pd.read_stata(TMP + "rolling_betas_m.dta")
reg_data_rb_betas = reg_data_rb.merge(betas[['cusip', 'date_m', beta_spec]], on=['cusip', 'date_m'], how='left')

# process covenants
covenants = pd.read_stata(TMP + "mergent_covenants.dta")
cov_categories = ['negative_pledge_covenant', 'cross_default', 'cross_acceleration', 'change_control_put_provisions', 'rating_decline_trigger_put', 'declining_net_worth', 
    'after_acquired_property_clause', 'asset_sale_clause', 'consolidation_merger', 'sale_assets', 'investments', 'maintenance_net_worth', 'indebtedness', 
    'funded_debt', 'dividends_related_payments', 'restricted_payments', 'liens', 'sales_leaseback', 'senior_debt_issuance', 'subordinated_debt_issuance', 
    'stock_issuance_issuer', 'stock_transfer_sale_disp', 'transaction_affiliates', 'net_earnings_test_issuance', 'fixed_charge_coverage', 'leverage_test', 
    'borrowing_restricted', 'legal_defeasance', 'defeasance_wo_tax_conseq', 'covenant_defeas_wo_tax_conseq', 'economic_cov_def']
for category in cov_categories:
    covenants[category] = covenants[category].fillna(0).astype(int)
covenants['covenant_string'] = ''
for category in cov_categories:
    covenants['covenant_string'] += covenants[category].astype(str)
assert set(covenants['covenant_string'].map(lambda x: len(x))) == {len(cov_categories)}
assert set.union(*covenants['covenant_string'].map(lambda x: set(x)).values) == {'0', '1'}
covenant_strings = covenants[['cusip', 'covenant_string']]

# sample with covenants
reg_data_cvn = reg_data_rb.merge(covenant_strings, on='cusip', how='left')
reg_data_cvn.covenant_string = reg_data_cvn.covenant_string.fillna('0'*len(cov_categories))
reg_data_cvn_m = group_id(reg_data_cvn, cols=['full_fe_category', 'covenant_string'], name='full_fe_category_cvn', merge=True)

# lead underwriters data
underwriters = pd.read_stata(TMP + "mergent_lead_underwriters.dta")
uw_groups = underwriters.groupby('cusip').agg({'lu_id': lambda x: set(x)}).reset_index()
uw_groups['uw_group'] = uw_groups.lu_id.astype(str)

# sample with underwriters
reg_data_uw = reg_data_rb.merge(uw_groups, on='cusip', how='left')
reg_data_uw = reg_data_uw.dropna(subset=['uw_group'])
reg_data_uw_m = group_id(reg_data_uw, cols=['full_fe_category', 'uw_group'], name='full_fe_category_uw', merge=True)

# industry
ind_df = pd.read_stata(TMP + "industry_master_sic.dta").rename(columns={'cusip6': 'cusip6_up_bg'})
ind_df.sic_uf = ind_df.sic_uf.astype(int)
reg_data_ind = reg_data_rb.merge(ind_df, on="cusip6_up_bg")
reg_data_nuf = reg_data_ind[reg_data_ind.sic_uf == 0]

# ---------------------------------------
# Robustness specifications
# ---------------------------------------

# further interactions
reg_data_rb['duration_int'] = reg_data_rb.duration_y.round().astype(int)
reg_data_rb['cat_d1'] = reg_data_rb.full_fe_category.astype(str) + "_" + reg_data_rb.duration_int.astype(str)
reg_data_rb['cat_144a'] = reg_data_rb.full_fe_category.astype(str) + "_" + reg_data_rb.bc_is_144a.astype(str)
reg_data_rb['bond_age'] = round((reg_data_rb.date_m - reg_data_rb.issuance_date).dt.days / 360).astype(int)
reg_data_rb['cat_age'] = reg_data_rb.full_fe_category.astype(str) + "_" + reg_data_rb.bond_age.astype(str)

# robustness
rb_regs = {}
rb_regs['baseline'] = mt.reg(reg_data_rb, outcome, regressor, fe_name='full_fe_category', awt_name=weights, cluster=cluster_var)
rb_regs['duration_1y'] = mt.reg(reg_data_rb, outcome, regressor, fe_name='cat_d1', awt_name=weights, cluster=cluster_var)
rb_regs['reg_144a'] = mt.reg(reg_data_rb, outcome, regressor, fe_name='cat_144a', awt_name=weights, cluster=cluster_var)
rb_regs['bond_age'] = mt.reg(reg_data_rb, outcome, regressor, fe_name='cat_age', awt_name=weights, cluster=cluster_var)
rb_regs['drop_zeros'] = mt.reg(reg_data_rb[reg_data_rb.share_ins_issuance > 0], outcome, regressor, fe_name='full_fe_category', awt_name=weights, cluster=cluster_var)
rb_regs['no_aig'] = mt.reg(reg_data_rb, outcome, 'share_ins_issuance_noaig', fe_name='full_fe_category', awt_name=weights, cluster=cluster_var)
rb_regs['no_hedging'] = mt.reg(reg_data_rb, 'log_drawdown_py', regressor, fe_name='full_fe_category', awt_name=weights, cluster=cluster_var)
rb_regs['drop_callables'] = mt.reg(reg_data_rb[reg_data_rb.callable == 0], outcome, regressor, fe_name='full_fe_category', awt_name=weights, cluster=cluster_var)
rb_regs['fixed_window'] = mt.reg(reg_data_rb, 'log_hedged_drawdown_fw', regressor, fe_name='full_fe_category', awt_name=weights, cluster=cluster_var)
rb_regs['no_trade_gaps'] = mt.reg(reg_data_rb[reg_data_rb.has_trading_gaps == 0], outcome, regressor, fe_name='full_fe_category', awt_name=weights, cluster=cluster_var)
rb_regs['proxy'] = mt.reg(reg_data_rb, outcome, 'nonfund_share', fe_name='full_fe_category', awt_name=weights, cluster=cluster_var)
rb_regs['betas'] = mt.reg(reg_data_rb_betas, outcome, [regressor, beta_spec], fe_name='full_fe_category', awt_name=weights, cluster=cluster_var)
rb_regs['covenants'] = mt.reg(reg_data_cvn_m, outcome, regressor, fe_name='full_fe_category_cvn', awt_name=weights, cluster=cluster_var)
rb_regs['underwriters'] = mt.reg(reg_data_uw_m, outcome, regressor, fe_name='full_fe_category_uw', awt_name=weights, cluster=cluster_var)
rb_regs['fin_util'] = mt.reg(reg_data_nuf, outcome, regressor, fe_name='full_fe_category', awt_name=weights, cluster=cluster_var)

# collect results
rb_results = pd.DataFrame([
    {k: round(v.beta.values[0], 2) for k,v in rb_regs.items()},
    {k: round(v.se.values[0], 2) for k,v in rb_regs.items()},
    {k: round(v.r2, 2) for k,v in rb_regs.items()},
    {k: int(v.N) for k,v in rb_regs.items()},
    {k: round(v.ci_hi.values[0], 2) for k,v in rb_regs.items()},
    {k: round(v.ci_lo.values[0], 2) for k,v in rb_regs.items()},
], index=['beta', 'se', 'r2', 'N', 'beta_max', 'beta_min'])
rb_results.T.to_stata(TMP + "ols_robustness_estimates.dta");

# save proxy specification details
proxy_out = rb_results.T.reset_index()
proxy_out = proxy_out[proxy_out['index']=='proxy']
proxy_out['id_firms'] = get_identifying_issuers('proxy', df=reg_data_rb, regs_dict=rb_regs)
proxy_out.to_stata(TMP + "us_proxy_regression_results.dta")

# ---------------------------------------
# 2SLS Regressions
# ---------------------------------------

# load bartik instrument
instrument_stata_df = pd.read_stata(TMP + "security_bartik_instrument.dta")

# merge in shift-share instrument
iv_df = reg_data_rb.merge(instrument_stata_df, on='cusip', how='left', indicator=True)
iv_df = iv_df.drop(columns=['maturity_date'])

# function to run 2sls estimation via stata
def run_2sls_stata(df_path, outcome, regressor, z_name, fe_name, awt_name, cluster_name, res_out_path, sample_out_path, verbose=True):
    proc_res = subprocess.run(["stata-mp", "-b", CODE_PATH + "scripts/run_2sls.do", df_path, outcome, regressor, z_name, fe_name, awt_name, cluster_name, res_out_path, sample_out_path], capture_output=True)
    if verbose:
        print("Process returned with status code {}".format(proc_res.returncode))
    return None

# specification
instrument = 'instrument_norm'
cluster_var = 'firm_event_cluster'
firm_var = 'consolidated_issuer_id'

# run regressions
sample_in_path = TMP + "security_iv_sample.dta"
iv_df.to_stata(sample_in_path)
iv_results = {}
iv_sample_dummies = {}
for fe_spec in ['full_fe_category', 'fdr_event']:
    res_out_path = TMP + "iv_res_out_{}.dta".format(fe_spec)
    sample_out_path = TMP + "iv_sample_out_{}.dta".format(fe_spec)
    run_2sls_stata(
        df_path = sample_in_path, 
        outcome = outcome, 
        regressor = regressor, 
        z_name = instrument, 
        fe_name = fe_spec, 
        awt_name = weights, 
        cluster_name = cluster_var, 
        res_out_path = res_out_path, 
        sample_out_path = sample_out_path
    )
    iv_results[fe_spec] = pd.read_stata(res_out_path)
    iv_sample_dummies[fe_spec] = pd.read_stata(sample_out_path).astype(bool).values
    
# number of identifying issuers
for fe_spec in ['full_fe_category', 'fdr_event']:
    iv_results[fe_spec]['n_id_firms'] = len(set(iv_df[iv_sample_dummies[fe_spec]][firm_var]))

# concatenate results
iv_panel = pd.concat(iv_results).droplevel(level=1)

# construct and output 2SLS panel for display (Table 4)
dummy_labels_iv = {
    'Event Dummy': {'fdr_event': 'Yes', 'full_fe_category': 'Yes'},
    'Bond Size' : {'fdr_event': 'Yes', 'full_fe_category': 'Yes'},
    'Bond Duration' : {'fdr_event': 'Yes', 'full_fe_category': 'Yes'},
    'Bond Rating' : {'fdr_event': 'Yes', 'full_fe_category': 'Yes'},
    'Issuer Dummy' : {'fdr_event': 'Yes', 'full_fe_category': 'Yes'},
    'Callable Dummy' : {'fdr_event': '--', 'full_fe_category': 'Yes'},
    'Senior Dummy' : {'fdr_event': '--', 'full_fe_category': 'Yes'},
    'Coupon Type' : {'fdr_event': '--', 'full_fe_category': 'Yes'},
}
for dummy in dummy_labels_iv.keys():
    iv_panel[dummy] = dummy_labels_iv[dummy]
iv_panel = iv_panel.rename(columns={'beta': 'Insurer Share', 'se': 'Standard Error', 'N': 'Identifying Observations', 'n_id_firms': 'Identifying Issuers', 'r2': 'R2', 'F': 'First Stage F'})
iv_row_display_order = ['Insurer Share', 'Standard Error', 'Event Dummy', 'Bond Size', 'Bond Duration', 'Bond Rating', 'Issuer Dummy', 'Callable Dummy', 'Senior Dummy', 'Coupon Type', 'Identifying Observations', 'Identifying Issuers', 'R2', 'First Stage F']
iv_panel = iv_panel.reindex(iv_row_display_order, axis=1).T
baseline_cols = ols_panel[['(4)', '(5)']]
baseline_cols = baseline_cols.T
baseline_cols['First Stage F'] = '--'
baseline_cols = baseline_cols.T
baseline_cols.columns = ['OLS, (1)', 'OLS, (2)']
iv_panel.columns = ['2SLS, (3)', '2SLS, (4)']
iv_panel_full = baseline_cols.join(iv_panel, how='right')
iv_panel_full.to_string(TABLES + "/iv_panel.txt")
